home *** CD-ROM | disk | FTP | other *** search
- /* National Institute of Standards and Technology (NIST)
- /* National Computer System Laboratory (NCSL)
- /* Office Systems Engineering (OSE) Group
- /* ********************************************************************
- /* D I S C L A I M E R
- /* (March 8, 1989)
- /*
- /* There is no warranty for the NIST NCSL OSE SGML parser and/or the NIST
- /* NCSL OSE SGML parser validation suite. If the SGML parser and/or
- /* validation suite is modified by someone else and passed on, NIST wants
- /* the parser's recipients to know that what they have is not what NIST
- /* distributed, so that any problems introduced by others will not
- /* reflect on our reputation.
- /*
- /* Policies
- /*
- /* 1. Anyone may copy and distribute verbatim copies of the SGML source
- /* code as received in any medium.
- /*
- /* 2. Anyone may modify your copy or copies of SGML parser source code or
- /* any portion of it, and copy and distribute such modifications provided
- /* that all modifications are clearly associated with the entity that
- /* performs the modifications.
- /*
- /* NO WARRANTY
- /* ===========
- /*
- /* NIST PROVIDES ABSOLUTELY NO WARRANTY. THE SGML PARSER AND VALIDATION
- /* SUITE ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
- /* EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- /* THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS
- /* WITH YOU. SHOULD THE SGML PARSER OR VALIDATION SUITE PROVE DEFECTIVE,
- /* YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
- /*
- /* IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL NIST BE LIABLE FOR
- /* DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR OTHER SPECIAL,
- /* INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
- /* INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA
- /* BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR A
- /* FAILURE OF THE PROGRAM TO OPERATE WITH PROGRAMS NOT DISTRIBUTED BY
- /* NIST) THE PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF
- /* SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
- */
-
- /************************************************************************/
- /* TITLE: SGML PARSER */
- /* SYSTEM: DTD PROCESSOR */
- /* SUBSYSTEM: */
- /* SOURCE FILE: DTD.C */
- /* AUTHOR: Jim Heath */
- /* */
- /* DATE CREATED: */
- /* LAST MODIFIED: */
- /* */
- /* REVISIONS */
- /* WHEN WHO WHY */
- /************************************************************************/
- #include <stdio.h>
- #include <setjmp.h>
- #include <unistd.h>
-
- #include "qntyset.h"
- #include "dtd.h"
- #include "dtdfncs.h"
- #include "dtdglbl.h"
- #include "dtddefs.h"
-
- #define MAXDECLSIZE 2048
- static char decl[MAXDECLSIZE];
- #define DEMO 0
- int num_open_ms=0;
-
- /* ============================================================ */
- main(argc, argv)
- int argc;
- char *argv[];
- {
- #undef PROFILE
- char *targv[2];
- REGISTER int j;
- filemask = 0;
- FUNCTRACE("main");
- #ifdef PROFILE
- argc = 2;
- targv[0] = "parse1";
- targv[1] = "testdoc";
- doopts(argc, targv);
- dofiles(targv);
- #else
- doopts(argc, argv);
- dofiles(argv);
- #endif
- SETENV:
- if (setjmp(Xenv) != 0) {
- while(1) {
- if ((j = jgetc()) == EOF)
- terminate(1, "EOF while recovering for syntax error");
- if (j == MDC) {
- stackinit();
- goto SETENV;
- }
- }
- }
- processinput();
- }
- /* ============================================================ */
- void processinput()
- {
- int tempnum, elements = 0, attributes = 0,
- entities = 0;
- enum {
- TRYPIO, GETDS, GETMDO, GETKEYWORD, TRYSGML }
- state = TRYSGML;
-
- do {
- switch(state) {
- case TRYSGML:
- dogetsgml(); /* look for and strip sgml declaration */
- state = GETDS;
- break;
- case GETDS:
- tempnum = num_open_ms;
- CLEARFLAG(IN_DECL);
- dogetds(elements, attributes, entities); /* arguments are for show count */
- if (num_open_ms > 0)
- get_ms_closes();
- if (tempnum == num_open_ms)
- state = GETMDO;
- else
- state = GETDS;
- break;
- case TRYPIO:
- dopio();
- state = GETDS;
- break;
- case GETMDO:
- CLRDECL();
- SETFLAG(IN_DECL);
- if (TESTFLAG(IN_ENTITY))
- SETFLAG(DECL_IS_IN_ENTITY);
- else
- CLEARFLAG(DECL_IS_IN_ENTITY);
- if ((inpMDO()) != GOOD)
- state = TRYPIO;
- else
- state = GETKEYWORD;
- break;
- case GETKEYWORD:
- dokeyword();
- state = GETDS;
- }
- } while(1);
- }
- /* ============================================================ */
- /* ============================================================ */
- void doopts(argc, argv)
- int argc;
- char *argv[];
- {
- REGISTER int j;
- FUNCTRACE("doopts");
- printf("argc = %d\n", argc);
- for (j = 0; j < argc; j++)
- printf("argv[%d] = %s\n", j, argv[j]);
- for (j = 2; j < argc; j++) {
- switch(*(argv[j] + 1)) {
- case 'P':
- case 'p':
- strcpy(entfilename, (argv[j] + 2));
- break;
- case 'T':
- case 't':
- strcpy(pathname, (argv[j] + 2));
- if (*(pathname + strlen(pathname) -1) == ':')
- strcat(pathname, "\\");
- else if (ISALPHA(*(pathname + strlen(pathname) -1)))
- strcat(pathname, ":\\");
- break;
- case '1':
- debug = atoi((argv[j] + 2));
- break;
- case 'F':
- deletefiles = FALSE;
- break;
- case 'H':
- /* heading();*/
- break;
- default:
- terminate(1, "illegal option to PARSE1");
- }
- }
- }
- /* ============================================================ */
- void dofiles(argv)
- char *argv[];
- {
- /* create the intermediate work files */
- FUNCTRACE("dofiles");
- strcpy(treefname,pathname);
- strcat(treefname,"treefile.sgm");
-
- strcpy(symbfname,pathname);
- strcat(symbfname,"symbfile.sgm");
-
- /* create the attribute file */
- strcpy(attrfname,pathname);
- strcat(attrfname,"attrfile.sgm");
-
- /* create the attribute work file */
- strcpy(attrtname,pathname);
- strcat(attrtname,"attrtemp.sgm");
-
- /* build the dtdfile for later use */
- strcpy(dtdfname,pathname);
- strcat(dtdfname,"dtdfile1.sgm");
-
- strcpy(preffname,pathname);
- strcat(preffname,"preffile.sgm");
-
- strcpy(greffname,pathname);
- strcat(greffname,"greffile.sgm");
-
- strcpy(posfname,pathname);
- strcat(posfname,"posfile.sgm");
-
- strcpy(xcptfname,pathname);
- strcat(xcptfname,"except.sgm");
-
- strcpy(exclfname,pathname);
- strcat(exclfname,"exclusns.sgm");
-
- strcpy(inclfname,pathname);
- strcat(inclfname,"inclusns.sgm");
-
- strcpy(cmfname,pathname);
- strcat(cmfname,"cmfile.sgm");
-
- /* build the name for my input file */
- strcpy(docfname, argv[1]);
-
- unlinkall(TRUE);
-
- treefile = safecreat(treefname, TREEFILE);
- symbfile = safecreat(symbfname, SYMBFILE);
- attrtemp = safecreat(attrtname, ATTRTEMP);
- preffile = safecreat(preffname, PREFFILE);
- greffile = safecreat(greffname, GREFFILE);
- docfile = safefopen(docfname, "r", DOCFILE);
- cmfile = safefopen(cmfname, "wb", CMFILE);
- inclfile = safecreat(inclfname, INCLFILE);
- exclfile = safecreat(exclfname, EXCLFILE);
- }
- /* ============================================================ */
- void dodecl(clrflag, c, inptr, outptr)
- int clrflag;
- char c;
- REGISTER char *inptr;
- char **outptr;
- {
- static char *declptr = decl;
-
-
- if(clrflag == ON){
- memset(decl, '\0', MAXDECLSIZE);
- declptr = decl;
- return;
- }
- if(c != NULL){
- if(declptr != (decl + MAXDECLSIZE)){
- *declptr++ = c;
- *declptr = '\0';
- return;
- }
- else
- terminate(1, "MAXDECLSIZE exceeded in normalized declaration buffer");
- }
- if(inptr != NULL){
- while(*inptr != '\0'){
- if(declptr == (decl + MAXDECLSIZE))
- terminate(1, "MAXDECLSIZE exceeded in normalized declaration buffer");
- *declptr++ = *inptr++;
- *declptr = '\0';
- }
- return;
- }
- if(outptr != NULL){
- *outptr = decl;
- return;
- }
- }
- /* ============================================================ */
- void writeposition()
- {
- long position;
- int posfile;
- position = ftell(docfile);
- if (position == -1L)
- terminate(1, "failure on lseek");
- posfile = safecreat(posfname, POSFILE);
- safewrite(posfile, (char *) &position, sizeof(position));
- safeclose(posfile, "position file", POSFILE);
- }
- /* ============================================================ */
- static void showcounts(elements, attributes, entities)
- int elements, attributes, entities;
- {
- printf("\ntotal elements = %d\n", elements);
- printf("total attributes = %d\n", attributes);
- printf("total entities = %d\n\n", entities);
- }
- /* ============================================================ */
- void dogetds(elements, attributes, entities)
- int elements, attributes, entities;
- {
- REGISTER int j;
- if (INPDS() == EOF) /* inpsep: gets s's */
- terminate(1, "EOF found while looking for DS");
- if ((j = jgetc()) != DSC) {
- jungetc(j);
- return;
- }
- ADDCHAR(DSC);
- if (INPPS() == EOF)
- terminate(1, "EOF found following DSC");
- if ((j = jgetc()) == MDC) {
- finddocelt();
- writeposition();
- ADDCHAR(DSC);
- closeall();
- showcounts(elements, attributes, entities);
- if (strlen(rootelt) != 0) {
- printf("\n\nERROR - no element declaration for root element\n");
- errflag = 1;
- }
- if (errflag == 0)
- bldsymbtbl();
- else {
- unlinkall(TRUE);
- exit(1);
- }
- }
- else
- if (j == DSC) {
- jungetc(']');
- jungetc(']');
- }
- else
- terminate(1, "expected MDC following DSC");
- }
- /* ============================================================ */
- void dopio()
- {
- REGISTER int j, count = 0;
- char *mydecl;
- if ((j = jgetc()) == EOF)
- terminate(1, "EOF found");
- if (j == '<') {
- ADDCHAR(j);
- if ((j = jgetc()) == EOF)
- terminate(1, "EOF within declaration"); /* not really a valid statement */
- ADDCHAR(j);
- if (j != '?')
- syntxerr("expected MDO or PIO");
- }
- else
- syntxerr("expected MDO or PIO");
- do { /* found valid PIO */
- if (count > PILEN)
- syntxerr("length of processing instruction exceeds PILEN");
- if ((j = jgetc()) == EOF)
- terminate(1, "EOF within declaration");
- ++count;
- ADDCHAR(j);
- } while(j != MDC);
- GETDECLADDR(&mydecl);
- printf("%s\n", mydecl);
- }
- /* ============================================================ */
- void finddocelt()
- {
- long currpos;
- int c;
-
- STEP1:
- while(1) {
- CLRDECL();
- while(1) {
- if ((c = getc(docfile)) == EOF)
- PERROR("unexpected EOF in finddocelt");
- switch(c) {
- case ERO:
- if ((currpos = ftell(docfile)) == -1L)
- PERROR("failure in ftell(docfile) in finddocelt");
- currpos--;
- if ((c = getc(docfile)) == EOF)
- PERROR("unexpected EOF in finddocelt");
- if (isnmstrt(c)) {
- ADDCHAR(ERO);
- ADDCHAR(c);
- syntxerr("illegal character found while searching other prolog");
- }
- if (fseek(docfile, currpos, SEEK_SET) == -1L)
- PERROR("failure in fseek(docfile) in finddocelt()");
- return;
- case SPACE:
- case RS:
- case RE:
- case TAB:
- continue;
- case '<':
- finddocelt2();
- return;
- default:
- if ((currpos = ftell(docfile)) == -1L)
- PERROR("failure in ftell(docfile) in finddocelt");
- currpos--;
- if (fseek(docfile, currpos, SEEK_SET) == -1L)
- PERROR("failure in fseek(docfile) in finddocelt()");
- return;
- }
- }
- }
- }
- /* ============================================================ */
- void finddocelt2()
- {
- long currpos;
- int c;
- char *tdecl;
-
- if ((currpos = ftell(docfile)) == -1L)
- PERROR("failure in ftell(docfile) in finddocelt2");
- currpos--;
- if ((c = getc(docfile)) == EOF)
- PERROR("unexpected EOF in finddocelt2");
- switch(c) {
- case '?':
- dopio2();
- GETDECLADDR(&tdecl);
- printf("%s\n", tdecl);
- break;
- case '!':
- if (docomment() == FALSE){
- if (fseek(docfile, currpos, SEEK_SET) == -1L)
- PERROR("failure in fseek(docfile) in finddocelt2()");
- }
- GETDECLADDR(&tdecl);
- printf("%s\n", tdecl);
- break;
- default:
- if (fseek(docfile, currpos, SEEK_SET) == -1L)
- PERROR("failure in fseek(docfile) in gotodocelt2()");
- }
- }
- /* ============================================================ */
- void dopio2()
- {
- int c;
- int len = 0;
-
- ADDSTRING("<?");
- while(1) {
- if ((c = getc(docfile)) == EOF)
- PERROR("unexpected EOF while reading other prolog");
- ADDCHAR(c);
- if (c == MDC)
- return;
- if (++len > PILEN)
- syntxerr("PI too long in other prolog");
- }
- }
- /* ============================================================ */
- int docomment()
- {
- int c;
- enum {
- sawNONE, sawONE, sawTWO }
- state;
-
- if ((c = getc(docfile)) == EOF)
- PERROR("unexpected EOF while reading other prolog");
- if (c != '-')
- return(FALSE);
- if ((c = getc(docfile)) == EOF)
- PERROR("unexpected EOF while reading other prolog");
- if (c != '-')
- return(FALSE);
- ADDSTRING("<!--");
- INSIDE_COMMENT:
- for(state = sawNONE; state != sawTWO;) {
- if ((c = getc(docfile)) == EOF)
- PERROR("unexpected EOF while reading other prolog");
- ADDCHAR(c);
- if (c == '-') {
- if (state == sawNONE)
- state = sawONE;
- else if (state == sawONE)
- state = sawTWO;
- }
- else
- state = sawNONE;
- }
- while(1) {
- if ((c = getc(docfile)) == EOF)
- PERROR("unexpected EOF while reading other prolog");
- ADDCHAR(c);
- switch(c) {
- case MDC:
- return(TRUE);
- case SPACE:
- case RE:
- case RS:
- case TAB:
- continue;
- case '-':
- if ((c = getc(docfile)) == EOF)
- PERROR("unexpected EOF while reading other prolog");
- ADDCHAR(c);
- if (c == '-')
- goto INSIDE_COMMENT;
- syntxerr("illegal declaration in other prolog");
- }
- }
- }
- /* ============================================================ */
- void dokeyword()
- {
- char namearray[NAMELEN + 1], *ptr, *mydecl;
- int j, elements = 0, attributes = 0,
- entities = 0, notations = 0;
- static int founddoctype = FALSE;
-
- ptr = namearray;
- j = INPNAME( &ptr, NAMELEN, TOUPPER);
- switch (j) {
- case KW_DOCTYPE:
- ADDSTRING("DOCTYPE");
- if (INPPS() >= 1)
- ADDCHAR(SPACE);
- else
- terminate(1, "Error while processing DOCTYPE declaration");
- /* input the document name */
- ptr = rootelt;
- if (INPNAME( &ptr, NAMELEN, TOUPPER) >= GOOD){
- ADDSTRING(rootelt);
- INPPS();
- if ((j = jgetc()) != DSO)
- terminate(1, "Error while processing DOCTYPE declaration");
- ADDSTRING(" [");
- }
- else
- terminate(1, "invalid docname");
- founddoctype = TRUE;
- break;
- case KW_ENTITY:
- ADDSTRING("ENTITY");
- if(founddoctype == FALSE)
- syntxerr("Entity declaration found before DOCTYPE declaration");
- doentity();
- entities++;
- if (TESTFLAG(IN_ENTITY) != TESTFLAG(DECL_IS_IN_ENTITY))
- syntxerr("illegal parameter entity reference");
- break;
- case KW_ELEMENT:
- ADDSTRING("ELEMENT");
- if(founddoctype == FALSE)
- syntxerr("ELEMENT declaration found before DOCTYPE declaration");
- elements += doelement();
- if (TESTFLAG(IN_ENTITY) != TESTFLAG(DECL_IS_IN_ENTITY))
- syntxerr("illegal parameter entity reference");
- break;
- case KW_ATTLIST:
- ADDSTRING("ATTLIST");
- if(founddoctype == FALSE)
- syntxerr("ATTLIST declaration found before DOCTYPE declaration");
- attributes += doattlist();
- if (TESTFLAG(IN_ENTITY) != TESTFLAG(DECL_IS_IN_ENTITY))
- syntxerr("illegal parameter entity reference");
- break;
- case KW_NOTATION:
- ADDSTRING("NOTATION");
- if(founddoctype == FALSE)
- syntxerr("NOTATION declaration found before DOCTYPE declaration");
- notations = donotation();
- if (TESTFLAG(IN_ENTITY) != TESTFLAG(DECL_IS_IN_ENTITY))
- syntxerr("illegal parameter entity reference");
- break;
- default:
- j = jgetc();
- if(j == MDC) {
- if (TESTFLAG(IN_ENTITY) != TESTFLAG(DECL_IS_IN_ENTITY))
- syntxerr("illegal parameter entity reference");
- ADDCHAR(MDC);
- break; /* start over; look for another decl */
- }
- else if(j == '-') {
- jungetc(j);
- j = INPPS(); /* check for comment */
- if ((j = jgetc()) != MDC) {
- ADDCHAR(j);
- syntxerr("illegal sequence following MDO");
- }
- if (TESTFLAG(IN_ENTITY) != TESTFLAG(DECL_IS_IN_ENTITY))
- syntxerr("illegal parameter entity reference");
- ADDCHAR(MDC);
- break;
- }
- else if (j == DSO) { /* check for marked section */
- ADDCHAR(j); /* add DSO to output */
- get_marked_section(); /* process marked section */
- break;
- }
- else {
- ADDCHAR(j);
- syntxerr("illegal sequence following MDO");
- }
- }
- GETDECLADDR(&mydecl);
- printf("%s\n", mydecl);
- }
-